###############################################

# Make sure all of these packages are installed
library(lubridate)
library(sandwich)
library(lmtest)
library(stargazer)
library(knitr)
library(kableExtra)
# library(xtable)
# library(pander)
library(survival)
library(grid)
library(gridExtra)
library(tidyverse)
theme_set(theme_minimal())
# library(mle.tools)

###############################################

# Set the working directory to the path to the unzipped files
setwd("E:/Dropbox/Research/pill/PillUptakeReplication")

###############################################

# The rest should be automatic

# Australia
load(file="AFP_panel.RData")
load(file="AFP_x.RData")
aom = read_csv("Australia_Laws_WithOverseas.csv")

# US
load(file="nfs.RData")
ela18wide = 
  read_csv("ELA18Date.csv", col_types = cols(.default = "c")) %>%
  mutate_at(vars(-State), function(x) ymd(ifelse(str_length(x)==4, paste(x,"07-01"), x)))
ela18 = ela18wide %>% gather(key=ELAcode, value=ELA18Date, -State)
ela19wide = 
  read_csv("ELA19Date.csv", col_types = cols(.default = "c")) %>%
  mutate_at(vars(-State), function(x) ymd(ifelse(str_length(x)==4, paste(x,"07-01"), x)))
ela19 = ela19wide %>% gather(key=ELAcode, value=ELA19Date, -State)
ela20wide = 
  read_csv("ELA20Date.csv", col_types = cols(.default = "c")) %>%
  mutate_at(vars(-State), function(x) ymd(ifelse(str_length(x)==4, paste(x,"07-01"), x)))
ela20 = ela20wide %>% gather(key=ELAcode, value=ELA20Date, -State)
ela = full_join(ela18, ela19) %>% full_join(ela20)
ELAcodes = unique(ela19$ELAcode)
# These dates apply for 18-20 (inclusive) in all states before 1973.
# Coded anything from Roe on as the date of Roe 
# even though minors may not have been able to consent.
# I did this because the nfs data stop in 1971.
AbUS = read_csv("Abortion19Date.csv")


gg_color_hue <- function(n) {
  hues = seq(15, 375, length = n + 1)
  hcl(h = hues, l = 65, c = 100)[1:n]
}

StateFactorLevels = c("NSW", "Vic", "Qld", "WA", "SA", "Tas", "ACT", "NT", "Overseas")

# palette8 = c("#000000", "#E69F00", "#56B4E9", "#009E73", "darkmagenta", "#0072B2", "#D55E00", "#CC79A7")
palette8 = c("#E69F00", "#56B4E9", "#009E73", "darkmagenta", "#0072B2", "#D55E00", "#CC79A7", "111000")
palette9 = c("#E69F00", "#56B4E9", "#009E73", "darkmagenta", "#0072B2", "#D55E00", "#CC79A7", "111000", "#000000")

Color8 = scale_colour_manual(name = "State", values = palette8)
Color9 = scale_colour_manual(name = "State", values = palette9, labels=StateFactorLevels)
palette3 = palette8[1:3]
Color3 = scale_colour_manual(name = "", values = palette3)
palette4 = palette8[1:4]
Color4 = scale_colour_manual(name = "AoM18 before age", values = palette4)
Fill4  = scale_fill_manual(  name = "AoM18 before age", values = palette4)
Shape4 = scale_shape_manual( name = "AoM18 before age", values = c(16, 17, 15, 3))
Color4ELA = scale_colour_manual(name = "ELA before age", values = palette4)
Fill4ELA  = scale_fill_manual(  name = "ELA before age", values = palette4)
Shape4ELA = scale_shape_manual( name = "ELA before age", values = c(16, 17, 15, 3))
palette2 = palette8[c(1:2)]
names(palette2) = c("No","Yes")
Color2 = scale_colour_manual(name = "18-20", values = palette2)
Color2fill = scale_fill_manual(name = "18-20", values = palette2)
Color2linetype = scale_linetype_manual(name = "18-20", values = palette2)
Shape2 = scale_shape_manual(name = "18-20", values = c(16, 17))
Linetype2 = scale_linetype_manual(name = "18-20", values = c(1,2))

Mode <- function(x) {
  ux <- unique(x)
  ux[which.max(tabulate(match(x, ux)))]
}

ClusterReg <- function(fm, cluster, dfcw=1){
  # R-codes (www.r-project.org) for computing
  # clustered-standard errors. Mahmood Arai, Jan 26, 2008.
  # The arguments of the function are:
  # fitted model, cluster1 and cluster2
  # You need to install libraries sandwich and lmtest
  # reweighting the var-cov matrix for the within model
  # library(sandwich);library(lmtest)
  cluster = eval(substitute(cluster), envir=model.frame(fm))
  M <- length(unique(cluster))
  N <- length(cluster)
  K <- fm$rank
  dfc <- (M/(M-1))*((N-1)/(N-K))
  uj  <- apply(estfun(fm),2, function(x) tapply(x, cluster, sum));
  vcovCL <- dfc*sandwich(fm, meat=crossprod(uj)/N)*dfcw
  coeftest(fm, vcovCL)
}
ClusterCI <- function(fm, cluster, parm=NULL, level=.95, dfcw=1){
  # R-codes (www.r-project.org) for computing
  # clustered-standard errors. Mahmood Arai, Jan 26, 2008.
  # The arguments of the function are:
  # fitted model, cluster1 and cluster2
  # You need to install libraries sandwich and lmtest
  # reweighting the var-cov matrix for the within model
  # library(sandwich);library(lmtest)
  cluster = eval(substitute(cluster), envir=model.frame(fm))
  M <- length(unique(cluster))   
  N <- length(cluster)           
  K <- fm$rank                        
  dfc <- (M/(M-1))*((N-1)/(N-K))  
  uj  <- apply(estfun(fm),2, function(x) tapply(x, cluster, sum));
  vcovCL <- dfc*sandwich(fm, meat=crossprod(uj)/N)*dfcw
  coefci(fm, parm=parm, level=level, vcov=vcovCL) 
}

######################################

afp =
  panel %>%
  group_by(id) %>%
  mutate(Overseas1820 = any(StateCode[Age>=18 & Age<21]=="Overseas"),
         Overseas1920 = any(StateCode[Age>=19 & Age<21]=="Overseas"),
         Overseas1819 = any(StateCode[Age>=18 & Age<20]=="Overseas"),
         Overseas18 = any(StateCode[Age>=18 & Age<19]=="Overseas"),
         Overseas19 = any(StateCode[Age>=19 & Age<20]=="Overseas"),
         Overseas20 = any(StateCode[Age>=20 & Age<21]=="Overseas")) %>%
  group_by(id) %>%
  filter(!Overseas1820) %>%
  summarise(
    BornYear = BornYear[1],
    BornDate = BornDate[1],
    YCprop18 = mean(YC[18<=Age & Age<19]),
    YCprop19 = mean(YC[19<=Age & Age<20]),
    YCprop20 = mean(YC[20<=Age & Age<21]),
    YCprop1820 = mean(YC[18<=Age & Age<21]),
    YCprop1920 = mean(YC[19<=Age & Age<21]),
    YCprop1819 = mean(YC[18<=Age & Age<20]),
    StateMode18 = Mode(StateCode[18<=Age & Age<19]),
    StateMode19 = Mode(StateCode[19<=Age & Age<20]),
    StateMode20 = Mode(StateCode[20<=Age & Age<21]),
    StateMode1820 = Mode(StateCode[18<=Age & Age<21]),
    StateMode1920 = Mode(StateCode[19<=Age & Age<21]),
    StateMode1819 = Mode(StateCode[18<=Age & Age<20]),
    StateCode18 = StateCode18[1],
    StateCode19 = StateCode19[1],
    StateCode20 = StateCode20[1],
    StateCode21 = StateCode21[1],
    PillBefore18 = ifelse(is.na(UsedPillBefore18[1]), 0, UsedPillBefore18[1]),
    PillBefore19 = ifelse(is.na(UsedPillBefore19[1]), 0, UsedPillBefore19[1]),
    PillBefore20 = ifelse(is.na(UsedPillBefore20[1]), 0, UsedPillBefore20[1]),
    PillBefore21 = ifelse(is.na(UsedPillBefore21[1]), 0, UsedPillBefore21[1]),
    PillBefore22 = ifelse(is.na(UsedPillBefore22[1]), 0, UsedPillBefore22[1]))


# End of setup
######################################
# Descriptive figures

PillPropByDateAFP = filter(panel, 10<=Age & Age<=40 & StateCode!="Overseas")
PillPropByDateAFP = PillPropByDateAFP %>% 
  group_by(Date, AgeGroup2) %>% 
  summarise(PillPropByDateAFP = mean(Pill),
            PillCount = sum(Pill),
            Count1820 = length(id),
            PillCountWt = sum(Pill*scwt, na.rm=T),
            Count1820Wt = sum(scwt))
# PillPropByDateAFP$PillPropByDateAFP = PillPropByDateAFP$PillCount/PillPropByDateAFP$Count1820
PillPropByDateAFP$PillPropByDateAFPWt = PillPropByDateAFP$PillCountWt/PillPropByDateAFP$Count1820Wt
# PillPropByDateAFP$RibbonLower = qbinom(.025, PillPropByDateAFP$Count1820, PillPropByDateAFP$PillPropByDateAFP)/PillPropByDateAFP$Count1820
# PillPropByDateAFP$RibbonUpper = qbinom(.975, PillPropByDateAFP$Count1820, PillPropByDateAFP$PillPropByDateAFP)/PillPropByDateAFP$Count1820
# PillPropByDateAFP$RibbonLowerWt = qbinom(.025, PillPropByDateAFP$Count1820, PillPropByDateAFP$PillPropByDateAFPWt)/PillPropByDateAFP$Count1820
# PillPropByDateAFP$RibbonUpperWt = qbinom(.975, PillPropByDateAFP$Count1820, PillPropByDateAFP$PillPropByDateAFPWt)/PillPropByDateAFP$Count1820
ggplot(PillPropByDateAFP %>% filter(AgeGroup2!="10-15"), 
       aes(x=Date, y=PillPropByDateAFP, group=AgeGroup2, color=AgeGroup2))+
  # geom_ribbon(aes(ymin=RibbonLowerWt, ymax=RibbonUpperWt),
  #             alpha=.1,linetype=0)+
  geom_line()+
  geom_line(data =PillPropByDateAFP %>% filter(AgeGroup2=="18-20"), 
            aes(x=Date, y=PillPropByDateAFP), size=1.2)+
  scale_y_continuous("Proportion of women using the pill")+
  scale_x_date("", limits=ymd(c("1961-01-01","1982-01-01")))+
  annotate("text", 
           x=ymd(paste(c(1974,1973, 1981, 1976),"-01-01",sep="")), 
           y=c(.1,.485,.365,.285),
           label=c("16-17","18-20","21-30","31-40"))+
  theme(legend.position = "none")+
  coord_cartesian(ylim = c(0,.5))+
  # Make the subcaptions closer to the figure they go with
  theme(plot.margin = unit(c(1,.1,0,.1), "cm"))
ggsave("PillPropByDate.pdf", width=6.5, height=3.5)

PillPropByDateNFS = 
  nfspanel %>%
  group_by(Date, AgeGroup) %>%
  summarise(Pill=mean(Pill, na.rm=T))
AgeLabs = c("16-17", "18-20", "21-30", "31-40")
AgeLabX = ymd(c("1970-10-20", "1970-04-30", "1967-03-01", "1970-10-20"))
AgeLabY = c(.12, .25, .33, .172)

ggplot(PillPropByDateNFS %>% 
         filter(Date<ymd("1970-01-01"), 
                AgeGroup%in%c("16-17", "18-20", "21-30", "31-40")), 
         aes(x=Date, y=Pill, group=AgeGroup, color=AgeGroup)) +
  geom_line() +
  geom_line(data =PillPropByDateNFS %>% filter(Date<ymd("1970-01-01"), AgeGroup=="18-20"), 
            aes(x=Date, y=Pill), size=1.2)+
  scale_x_date("", limits=ymd(c("1961-01-01","1982-01-01")))+
  scale_y_continuous("Proportion of women using the pill") +
  # coord_cartesian(ylim=c(0,.6))
  # coord_cartesian(xlim = ymd(c("1961-01-01", "1982-01-01")), expand = TRUE, ylim = c(0,.5)) +
  annotate("text", x=AgeLabX, y=AgeLabY, label=AgeLabs)+
  theme(legend.position = "none")+
  coord_cartesian(ylim = c(0,.5))+
  # Make the subcaptions closer to the figure they go with
  theme(plot.margin = unit(c(1,.1,0,.1), "cm"))
ggsave("PillPropByDateNFS.pdf", width=6.5, height=3.5)


AoMDisplayTable =
  aom %>%
  filter(StateCode!="Overseas") %>%
  mutate(Date = ymd(paste(AoMLawChangeYear, AoMLawChangeMonth,"01",sep="-"))) %>%
  arrange(Date) %>%
  mutate(`State or territory` = StateName,
         `AoM Commenced` = paste(AoMLawChangeDay, month.abb[AoMLawChangeMonth], AoMLawChangeYear),
         `MM Commenced` = c("1 May 1987", rep("6 May 1992", 7)))%>%
  select(`State or territory`, `AoM Commenced`, `MM Commenced`)

kable(AoMDisplayTable, format="latex", linesep = "",
      booktabs=T,
      caption='Dates of age of majority laws and mature minor doctrines by state and territory (MM is "mature minor doctrine" and AoM is "age of majority of 18" unless otherwise specified)') %>%
  kable_styling(latex_options = "striped")

CountsByStateDateAFP = 
  panel %>%
  group_by(StateCode, Date) %>%
  summarise(Count = sum(!is.na(id))) %>%
  mutate(StateFactor = factor(StateCode, StateFactorLevels))
CountLabelsAFP = 
  CountsByStateDateAFP %>%
  filter(Date==ymd("1985-12-01"))
CountLabelsNT = 
  CountsByStateDateAFP %>%
  filter(Date==ymd("1984-12-01") & StateCode=="NT")
CountLabelsAFP = rbind(CountLabelsAFP, CountLabelsNT)
CountLabelsAFP$date = ymd("1987-06-01")
CountLabelsAFP$date[CountLabelsAFP$StateCode=="NSW"] = ymd("1987-10-20")
CountLabelsAFP$date[CountLabelsAFP$StateCode=="ACT"] = ymd("1987-10-01")
CountLabelsAFP$date[CountLabelsAFP$StateCode=="Overseas"] = ymd("1961-01-01")
CountLabelsAFP$Count[CountLabelsAFP$StateCode=="Overseas"] = 
  CountsByStateDateAFP$Count[CountsByStateDateAFP$Date==ymd("1962-01-01") & CountsByStateDateAFP$StateCode=="Overseas"] +30
CountLabelsAFP$date[CountLabelsAFP$StateCode=="NT"] = ymd("1985-03-01")
CountLabelsAFP$Count[CountLabelsAFP$StateCode=="NT"] = -20

ggplot(CountsByStateDateAFP %>% filter(Date<ymd("1986-05-01")), 
       aes(x=Date, y=Count, group=StateFactor, color=StateFactor))+
  geom_line() +
  scale_x_date("") +
  # scale_y_continuous("Count") +
  annotate("text", size=2.5,
           x=CountLabelsAFP$date, 
           y=CountLabelsAFP$Count, 
           label=CountLabelsAFP$StateCode) +
  Color9 +
  # scale_colour_grey() +
  # theme(legend.position = "None") + 
  geom_vline(xintercept=ymd("1966-10-01"), linetype="dashed", color="grey50") +
  annotate("text", x=ymd("1963-08-01"), y=850, label= "Oct 1966", size=2.5) +
  theme(legend.position = "none")
ggsave("CountsByStateDateAFP.pdf", width = 6.5, height=4)

CountsByStateAgeAFP = 
  panel %>%
  mutate(AgeMonths = round(AgeMonths)) %>%
  group_by(StateCode, AgeMonths) %>%
  summarise(Count = sum(!is.na(id))) %>%
  mutate(StateFactor = factor(StateCode, StateFactorLevels))
CountsByStateAgeAFPLabels = 
  CountsByStateAgeAFP %>%
  filter(AgeMonths==1)
CountsByStateAgeAFPLabels$AgeMonths = -5

ggplot(CountsByStateAgeAFP, 
       aes(x=AgeMonths, y=Count, group=StateFactor, color=StateFactor))+
  geom_line() +
  scale_x_continuous("Age in months", breaks=seq(0,600,120)) +
  geom_vline(xintercept=18*12, linetype="dashed", color="grey50") +
  geom_vline(xintercept=21*12, linetype="dashed", color="grey50")+
  annotate("text", x=c(18*12-2, 21*12+2), y=c(850,850), label= c("18", "21"), size=2.5, hjust=c(1,0)) +
  Color9 +
  # scale_colour_grey() +
  theme(legend.position = "None") + 
  annotate("text", size=2.5,
           x=CountsByStateAgeAFPLabels$AgeMonths,
           y=CountsByStateAgeAFPLabels$Count,
           label=CountsByStateAgeAFPLabels$StateCode,
           hjust = 1) +
  # geom_vline(xintercept=ymd("1966-10-01"), linetype="dashed", color="grey50") +
  coord_cartesian(xlim=c(-30,600))
ggsave("CountsByStateAgeAFP.pdf", width = 6.5, height=4)

CountsByBornYear = 
  x %>% 
  group_by(BornYear) %>%
  summarize(Count=length(id)) %>%
  ungroup %>%
  mutate(Sample = "AFP")

ggplot(CountsByBornYear, aes(x=BornYear, y=Count))+
  geom_line() +
  scale_x_continuous("Year Born") +
  annotate("line", x=c(1953, 1953), y=c(20,97), color="grey50", linetype="dashed") +
  annotate("line", x=c(1960, 1960), y=c(20,97), color="grey50", linetype="dashed") +
  annotate("text", x=1953, y=19, label="Turn 18", size=2.5) +
  annotate("text", x=1953, y=16, label="in 1971", size=2.5) +
  annotate("text", x=1960, y=19, label="Turn 18", size=2.5) +
  annotate("text", x=1960, y=16, label="in 1978", size=2.5) 
ggsave("CountsByBornYearAFP.pdf", width = 6.5, height=3.5)


PillPropByMoSinceAoMAFP = panel %>%
  filter(18<=Age & Age<21 & StateCode!="Overseas") %>%
  filter(Date>=ymd("1966-10-01")) %>% 
  group_by(MonthsSinceAoM) %>% 
  summarise(PillProp = mean(Pill, na.rm=T),
            PillCount = sum(Pill, na.rm=T),
            Count1820 = length(id),
            PillCountWt = sum(Pill*scwt, na.rm=T),
            Count1820Wt = sum(scwt)) %>%
  ungroup %>%
  mutate(PillPropWt = PillCountWt/Count1820Wt,
         PillPropWt = PillCountWt/Count1820Wt,
         RibbonLower = qbinom(.025, Count1820, PillProp)/Count1820,
         RibbonUpper = qbinom(.975, Count1820, PillProp)/Count1820,
         RibbonLowerWt = qbinom(.025, Count1820, PillPropWt)/Count1820,
         RibbonUpperWt = qbinom(.975, Count1820, PillPropWt)/Count1820,
         Sample = "AFP")

ggplot(PillPropByMoSinceAoMAFP, 
       aes(x=MonthsSinceAoM, y=PillProp, ymin=RibbonLower, ymax=RibbonUpper))+
  geom_vline(xintercept = 0, linetype="dashed", color="grey50") + 
  # geom_ribbon(alpha=.1, linetype=0)+
  geom_line(size=1)+
  geom_line(aes(x=MonthsSinceAoM, y=RibbonLower), 
            color="grey50", size=.4)+
  geom_line(aes(x=MonthsSinceAoM, y=RibbonUpper), 
            color="grey50", size=.4)+
  # Color2 +
  # Color2fill +
  scale_y_continuous("Prop. using the pill", limits=c(0,.6))+
  scale_x_continuous("Years since AoM lowered to 18 from 21", limits=c(-5*12,5*12), 
                     breaks=seq(-5*12, 5*12, 12), labels=seq(-5, 5, 1))+
  coord_cartesian(ylim = c(0,.65))+
  theme(plot.margin = unit(c(0,0,0,.1), "cm"))
  # theme(axis.title.y = element_text(size=rel(.8)))
ggsave("PillPropByMoSinceAoMAFP.pdf", width = 6.5, height=1.9)


# By age
PillPropByMoSinceAoMAFP19 = panel %>%
  filter(18<=Age & Age<21 & StateCode!="Overseas") %>%
  filter(Date>=ymd("1966-10-01")) %>%
  mutate(AgeRound = floor(Age)) %>%
  group_by(MonthsSinceAoM, AgeRound) %>% 
  summarise(PillProp = mean(Pill, na.rm=T),
            PillCount = sum(Pill, na.rm=T),
            Count1820 = length(id),
            PillCountWt = sum(Pill*scwt, na.rm=T),
            Count1820Wt = sum(scwt)) %>%
  ungroup %>%
  mutate(PillPropWt = PillCountWt/Count1820Wt,
         PillPropWt = PillCountWt/Count1820Wt,
         RibbonLower = qbinom(.025, Count1820, PillProp)/Count1820,
         RibbonUpper = qbinom(.975, Count1820, PillProp)/Count1820,
         RibbonLowerWt = qbinom(.025, Count1820, PillPropWt)/Count1820,
         RibbonUpperWt = qbinom(.975, Count1820, PillPropWt)/Count1820,
         Sample = "AFP")

ggplot(PillPropByMoSinceAoMAFP19 %>% filter(AgeRound==19), 
       aes(x=MonthsSinceAoM, y=PillProp, ymin=RibbonLower, ymax=RibbonUpper))+
  geom_vline(xintercept = 0, linetype="dashed", color="grey50") + 
  # geom_ribbon(alpha=.1, linetype=0)+
  geom_line(size=1)+
  geom_line(aes(x=MonthsSinceAoM, y=RibbonLower), 
            color="grey50", size=.4)+
  geom_line(aes(x=MonthsSinceAoM, y=RibbonUpper), 
            color="grey50", size=.4)+
  # Color2 +
  # Color2fill +
  scale_y_continuous("Prop. using the pill")+
  scale_x_continuous("Years since AoM lowered to 18 from 21", limits=c(-5*12,5*12), 
                     breaks=seq(-5*12, 5*12, 12), labels=seq(-5, 5, 1))+
  coord_cartesian(ylim = c(0,.65))+
  theme(plot.margin = unit(c(1,0,0,.1), "cm"))
# theme(axis.title.y = element_text(size=rel(.8)))
ggsave("PillPropByMoSinceAoMAFP19.pdf", width = 6.5, height=2.3)

panelTemp = 
  panel %>% 
  select(id, StateMode1820) %>%
  group_by(id) %>%
  filter(row_number()==1) %>%
  ungroup
PillByAgeAfterAoM = x %>%
  select(id, BornYear, BornDate, PillBefore18:PillBefore21) %>%
  left_join(panelTemp) %>%
  left_join(aom %>% rename(StateMode1820=StateCode)) %>%
  mutate(AgeInYearAfterAoM = round(
    interval(floor_date(BornDate, unit="months"), 
             floor_date(dmy(Commenced), unit="months")
    )%/% months(1) / 12)) %>%
  filter(AgeInYearAfterAoM<30, AgeInYearAfterAoM>10) %>%
  group_by(AgeInYearAfterAoM) %>%
  summarise(PillBefore18 = mean(PillBefore18),
            PillBefore19 = mean(PillBefore19),
            PillBefore20 = mean(PillBefore20),
            PillBefore21 = mean(PillBefore21))

PillStartByYrSinceAoMAFP = 
  panel %>% 
  filter(18<=Age, Age<21, StateCode!="Overseas", Date>=ymd("1966-10-01")) %>%
  # Drop post-Pill obervations
  filter(Age<AgeAtFirstPillCensored+(1/100))%>%
  select(id, Date, StateCode, BornDate, BornYear, Pill, AoMLawChangeDate) %>%
  # group_by(id) %>%
  # mutate(AoMLawChangeDate = Mode(AoMLawChangeDate)) %>%
  # ungroup %>%
  mutate(
    MonthsSinceAoM = interval(floor_date(AoMLawChangeDate, unit="months")-months(1), Date)%/%months(1),
    YearsSinceAoM = ceiling(MonthsSinceAoM/12)) %>% 
  # Pooled across the country (years) (married/unmarried)
  group_by(YearsSinceAoM) %>% 
  summarise(PillProp = mean(Pill, na.rm=T)*12,
            PillCount = sum(Pill, na.rm=T),
            Count1820 = pmax(round(length(id)/12),1)) %>%
  ungroup %>%
  mutate(RibbonLower = qbinom(.025, Count1820, PillProp)/Count1820,
         RibbonUpper = qbinom(.975, Count1820, PillProp)/Count1820,
         # RibbonLower = PillProp+qnorm(.025)*((PillProp*(1-PillProp))/(Count1820-1))^.5,
         # RibbonUpper = PillProp+qnorm(.975)*((PillProp*(1-PillProp))/(Count1820-1))^.5,
         ELAcode = "AFP",
         Sample = "AFP")

ggplot(PillStartByYrSinceAoMAFP, 
       aes(x=YearsSinceAoM, y=PillProp, ymin=RibbonLower, ymax=RibbonUpper))+
  geom_vline(xintercept = 0, linetype="dashed", color="grey50")+ 
  # geom_ribbon(alpha=.1, linetype=0)+
  geom_line(linetype=2)+
  geom_line(aes(x=YearsSinceAoM, y=RibbonLower), linetype=2, color="grey50")+
  geom_line(aes(x=YearsSinceAoM, y=RibbonUpper), linetype=2, color="grey50")+
  geom_point()+
  scale_y_continuous("Prop. of at-risk women aged 18-20")+
  scale_x_continuous("Years since AoM lowerd to 18 from 21", breaks = seq(-8,8,2), limits=c(-8,8))+
  coord_cartesian(ylim = c(0,.5)) +
  theme(axis.title.y = element_text(size=rel(.8)))
ggsave("PillStartByYrSinceAoM.pdf", width = 6.5, height=3)



###########################
###########################
## Initial Pill Utake (AFP)


############
# ages 18-20 and 21-23 each pooled 

AgeValues = 10:36
BornYearMin = 1944
BornYearMax = 1966

# RegUptakeCombined
panel1820 = 
  panel %>%
  # rename(Age=age) %>%
  filter(18<=Age, Age<21, Age<AgeAtFirstPillCensored+(1/100), BornYearMin<=BornYear, BornYear<=BornYearMax) %>%
  select(id, Date, AgeMonthsStart, AgeMonths, Age, FirstPill, StateCode, BornDate, BornYear, After, MarriedBefore) %>%
  na.omit %>%
  # mutate(AgeFactor = factor(as.integer(floor(Age)))) %>%
  group_by(id) %>%
  filter(max(ifelse(StateCode=="Overseas", 1, 0))==0) %>%
  ungroup %>%
  mutate(
    StateFactor = factor(StateCode),
    BornFactor = factor(BornYear),
    # AgeFactor = factor(as.integer(floor(Age))),
    # AgeMonthsFactor = factor(AgeMonths),
    Months = interval(min(BornDate), BornDate)%/%months(1))
panel19 = 
  panel %>%
  # rename(Age=age) %>%
  filter(19<=Age, Age<20, Age<AgeAtFirstPillCensored+(1/100), BornYearMin<=BornYear, BornYear<=BornYearMax) %>%
  select(id, Date, AgeMonthsStart, AgeMonths, Age, FirstPill, StateCode, BornDate, BornYear, After, MarriedBefore) %>%
  na.omit %>%
  # mutate(AgeFactor = factor(as.integer(floor(Age)))) %>%
  group_by(id) %>%
  filter(max(ifelse(StateCode=="Overseas", 1, 0))==0) %>%
  ungroup %>%
  mutate(
    StateFactor = factor(StateCode),
    BornFactor = factor(BornYear),
    # AgeFactor = factor(as.integer(floor(Age))),
    # AgeMonthsFactor = factor(AgeMonths),
    Months = interval(min(BornDate), BornDate)%/%months(1))
panel1819 = 
  panel %>%
  # rename(Age=age) %>%
  filter(18<=Age, Age<20, Age<AgeAtFirstPillCensored+(1/100), BornYearMin<=BornYear, BornYear<=BornYearMax) %>%
  select(id, Date, AgeMonthsStart, AgeMonths, Age, FirstPill, StateCode, BornDate, BornYear, After, MarriedBefore) %>%
  na.omit %>%
  # mutate(AgeFactor = factor(as.integer(floor(Age)))) %>%
  group_by(id) %>%
  filter(max(ifelse(StateCode=="Overseas", 1, 0))==0) %>%
  ungroup %>%
  mutate(
    StateFactor = factor(StateCode),
    BornFactor = factor(BornYear),
    # AgeFactor = factor(as.integer(floor(Age))),
    # AgeMonthsFactor = factor(AgeMonths),
    Months = interval(min(BornDate), BornDate)%/%months(1))
panel2123 = 
  panel %>%
  # rename(Age=age) %>%
  filter(21<=Age, Age<23, Age<AgeAtFirstPillCensored+(1/100), BornYearMin<=BornYear, BornYear<=BornYearMax) %>%
  select(id, Date, AgeMonthsStart, AgeMonths, Age, FirstPill, StateCode, BornDate, BornYear, After, MarriedBefore) %>%
  na.omit %>%
  # mutate(AgeFactor = factor(as.integer(floor(Age)))) %>%
  group_by(id) %>%
  filter(max(ifelse(StateCode=="Overseas", 1, 0))==0) %>%
  ungroup %>%
  mutate(
    StateFactor = factor(StateCode),
    BornFactor = factor(BornYear),
    # AgeFactor = factor(as.integer(floor(Age))),
    # AgeMonthsFactor = factor(AgeMonths),
    Months = interval(min(BornDate), BornDate)%/%months(1))

RegUptakeInput = crossing(
  AgeRange = c("18-20", "21-23"),
  LT = c(TRUE, FALSE),
  Haz = c(TRUE, FALSE)) 

RegUptakeFunc = function(arg){
  dt = get(paste0("panel",   str_replace(arg$AgeRange, "-", "")))
  
  if(arg$Haz){
    if(arg$LT){
      RegTemp = 
        coxph(Surv(AgeMonthsStart, AgeMonths, FirstPill)~After+BornFactor+StateFactor + cluster(StateCode), data=dt)
      # RegTemp = lm(FirstPill~After+BornFactor+StateFactor+AgeMonthsFactor,data=dt)
    }else{
      RegTemp = 
        coxph(Surv(AgeMonthsStart, AgeMonths, FirstPill)~After+BornFactor+StateFactor*Months + cluster(StateCode), data=dt)
    }
    tibble(
      AgeRange = arg$AgeRange,
      Haz = arg$Haz,
      LT = arg$LT,
      Coef = RegTemp$coefficients[["After"]],
      SE = coef(summary(RegTemp))["After", "robust se"],
      SEUnclustered = coef(summary(RegTemp))["After", "se(coef)"],
      tstat = coef(summary(RegTemp))["After", "z"],
      pval = coef(summary(RegTemp))["After", "Pr(>|z|)"],
      nobs = RegTemp$n,
      nsize = length(unique(dt$id)))
  }else{
    if(arg$LT){
      RegTemp = lm(FirstPill~After+BornFactor+StateFactor,data=dt)
      # RegTemp = lm(FirstPill~After+BornFactor+StateFactor+AgeMonthsFactor,data=dt)
    }else{
      RegTemp = lm(FirstPill~After+BornFactor+StateFactor*Months,data=dt)
    }
    RegTempCl = ClusterReg(RegTemp, StateFactor)
    tibble(
      AgeRange = arg$AgeRange,
      Haz = arg$Haz,
      LT = arg$LT,
      Coef = RegTemp$coefficients[["After"]],
      SE = RegTempCl["After", "Std. Error"],
      SEUnclustered = coef(summary(RegTemp))["After", "Std. Error"],
      tstat = RegTempCl["After", "t value"],
      pval = RegTempCl["After", "Pr(>|t|)"],
      nobs = nobs(RegTemp),
      nsize = length(unique(dt$id)))
  }
}

RegUptake = bind_rows(lapply(split(RegUptakeInput, seq(nrow(RegUptakeInput))), RegUptakeFunc))
(RegUptake$Coef[RegUptake$AgeRange=="18-20" & !RegUptake$Haz & RegUptake$LT]*100) %>%
  prettyNum(digits=3) %>%
  writeLines("RegUptakeLPMCoefLT.txt")
(RegUptake$Coef[RegUptake$AgeRange=="18-20" & !RegUptake$Haz & RegUptake$LT]*100/(1-.8^(1/12))) %>%
  prettyNum(digits=3) %>%
  writeLines("RegUptakeLPMCoefLTProp.txt")
((.8 - (1-(1-.8^(1/12)+RegUptake$Coef[RegUptake$AgeRange=="18-20" & !RegUptake$Haz & RegUptake$LT]))^12)*100) %>%
  prettyNum(digits=3) %>%
  writeLines("RegUptakeLPMCoefLTCum.txt")
(RegUptake$Coef[RegUptake$AgeRange=="18-20" & RegUptake$Haz & RegUptake$LT]*100) %>%
  prettyNum(digits=3) %>%
  writeLines("RegUptakeHazCoefLT.txt")

aomSmall =
  aom %>%
  filter(StateCode != "Overseas") %>%
  mutate(AoMDate = ymd(paste(AoMLawChangeYear, AoMLawChangeMonth, AoMLawChangeDay, sep="-"))) %>%
  select(StateNum, StateCode, AoMDate) %>%
  arrange(StateNum)

RegUptakeRIFunc = function(sample=1){
  # Sample new treatment dates
  aomTemp = aomSmall %>%
    mutate(AoMDate = sample(seq.Date(ymd("1970-01-01"), ymd("1979-12-01"), by="months"), 8))

  panel1820 =
    panel1820 %>%
    left_join(aomTemp) %>%
    mutate(After = ifelse(Date>=AoMDate, 1, 0)) %>%
    mutate(
      StateFactor = factor(StateCode),
      BornFactor = factor(BornYear))
  panel2123 =
    panel2123 %>%
    left_join(aomTemp) %>%
    mutate(After = ifelse(Date>=AoMDate, 1, 0)) %>%
    mutate(
      StateFactor = factor(StateCode),
      BornFactor = factor(BornYear))

  RegTemp =
        coxph(Surv(AgeMonthsStart, AgeMonths, FirstPill)~
                After+BornFactor+StateFactor + cluster(StateFactor), data=panel1820)
  Haz1820 =
    tibble(
      AgeRange = "18-20",
      Haz = TRUE,
      LT = FALSE,
      CoefRI = RegTemp$coefficients[["After"]])
  RegTemp =
        coxph(Surv(AgeMonthsStart, AgeMonths, FirstPill)~
                After+BornFactor+StateFactor*Months + cluster(StateFactor), data=panel1820)
  Haz1820LT =
    tibble(
      AgeRange = "18-20",
      Haz = TRUE,
      LT = TRUE,
      CoefRI = RegTemp$coefficients[["After"]])
  RegTemp = lm(FirstPill~After+BornFactor+StateFactor,data=panel1820)
  LM1820 =
    tibble(
      AgeRange = "18-20",
      Haz = FALSE,
      LT = FALSE,
      CoefRI = RegTemp$coefficients[["After"]])
  RegTemp = lm(FirstPill~After+BornFactor+StateFactor*Months,data=panel1820)
  LM1820LT =
    tibble(
      AgeRange = "18-20",
      Haz = FALSE,
      LT = TRUE,
      CoefRI = RegTemp$coefficients[["After"]])

  RegTemp =
        coxph(Surv(AgeMonthsStart, AgeMonths, FirstPill)~
                After+BornFactor+StateFactor + cluster(StateFactor), data=panel2123)
  Haz2123 =
    tibble(
      AgeRange = "21-23",
      Haz = TRUE,
      LT = FALSE,
      CoefRI = RegTemp$coefficients[["After"]])
  RegTemp =
        coxph(Surv(AgeMonthsStart, AgeMonths, FirstPill)~
                After+BornFactor+StateFactor*Months + cluster(StateFactor), data=panel2123)
  Haz2123LT =
    tibble(
      AgeRange = "21-23",
      Haz = TRUE,
      LT = TRUE,
      CoefRI = RegTemp$coefficients[["After"]])
  RegTemp = lm(FirstPill~After+BornFactor+StateFactor,data=panel2123)
  LM2123 =
    tibble(
      AgeRange = "21-23",
      Haz = FALSE,
      LT = FALSE,
      CoefRI = RegTemp$coefficients[["After"]])
  RegTemp = lm(FirstPill~After+BornFactor+StateFactor*Months,data=panel2123)
  LM2123LT =
    tibble(
      AgeRange = "21-23",
      Haz = FALSE,
      LT = TRUE,
      CoefRI = RegTemp$coefficients[["After"]])

  bind_rows(
    Haz1820,
    Haz1820LT,
    Haz2123,
    Haz2123LT,
    LM1820,
    LM1820LT,
    LM2123,
    LM2123LT
  )
}

nSamples = 300
RegUptakeRI = bind_rows(map(1:nSamples, RegUptakeRIFunc))
save(RegUptakeRI, file="RegUptakeRI.RData")

# # For limited computer resources:
# # Run the next two lines once
# nSamples = 2
# RegUptakeRI = bind_rows(map(1:nSamples, RegUptakeRIFunc))
# # Save the estimates
# save(RegUptakeRI, file="RegUptakeRI.RData")
# # Run the next three lines many times (hundreds) when it is convenient for you
# # Each time, it will load the existing RI estimates, caculate a couple more, add them to the file, and save the estimates
# load(file="RegUptakeRI.RData")
# RegUptakeRI = bind_rows(RegUptakeRI, map(1:nSamples, RegUptakeRIFunc))
# save(RegUptakeRI, file="RegUptakeRI.RData")

load(file="RegUptakeRI.RData")
RegUptakeRIColl = 
  RegUptakeRI %>%
  left_join(RegUptake %>% select(AgeRange, Haz, LT, Coef)) %>%
  group_by(AgeRange, Haz, LT) %>%
  summarise(pRI = sum(abs(CoefRI)>=abs(Coef))/length(Coef))
RegUptake = left_join(RegUptake, RegUptakeRIColl)



###################################
# Separate regressions for each age

AgeValues = 12:34
BornYearMin = 1944
BornYearMax = 1966
writeLines(as.character(BornYearMin), 'RegUptakeByAgeBornYearMin.txt')
writeLines(as.character(BornYearMax), 'RegUptakeByAgeBornYearMax.txt')

RegUptakeInput = crossing(
  Age = AgeValues,
  LT = c(TRUE, FALSE)
) 


RegUptakeByAgeFunc = function(arg){
  dt = panel %>% 
    filter(Age<AgeAtFirstPillCensored+(1/100),
           floor(Age) == arg$Age, 
           BornYear+arg$Age>1968, BornYear+arg$Age<1981) %>%
    select(id, Date, AgeMonthsStart, AgeMonths, Age, FirstPill, 
           StateCode, BornDate, BornYear, After, MarriedBefore) %>%
    na.omit %>%
    group_by(id) %>%
    filter(max(ifelse(StateCode=="Overseas", 1, 0))==0) %>%
    ungroup %>%
    mutate(
      StateFactor = factor(StateCode),
      BornFactor = factor(BornYear),
      AgeFactor = factor(as.integer(floor(Age))),
      AgeMonthsFactor = factor(AgeMonths),
      Months = interval(min(BornDate), BornDate)%/%months(1))
  
  if(!arg$LT){
    RegTemp = lm(FirstPill~After+BornFactor+StateFactor,data=dt)
  }else{
    RegTemp = lm(FirstPill~After+BornFactor+StateFactor*Months,data=dt)
  }
  RegTempCl = ClusterReg(RegTemp, StateFactor)
  RegTempCI = ClusterCI(RegTemp, StateFactor, level=.9)
  tibble(
    Age = arg$Age,
    Haz = F,
    LT = arg$LT,
    Coef = coef(summary(RegTemp))[str_detect(row.names(coef(summary(RegTemp))), "After"), "Estimate"],
    CI90LB = RegTempCI[str_detect(row.names(RegTempCl), "After"), 1],
    CI90UB = RegTempCI[str_detect(row.names(RegTempCl), "After"), 2],
    SE = RegTempCl[str_detect(row.names(RegTempCl), "After"), "Std. Error"],
    SEUnclustered = coef(summary(RegTemp))[
      str_detect(row.names(coef(summary(RegTemp))), "After"), "Std. Error"],
    tstat = RegTempCl[str_detect(row.names(RegTempCl), "After"), "t value"],
    pval = RegTempCl[str_detect(row.names(RegTempCl), "After"), "Pr(>|t|)"],
    nobs = nobs(RegTemp),
    nsize = length(unique(dt$id)))
}

RegUptakeByAge = bind_rows(
  lapply(split(RegUptakeInput, seq(nrow(RegUptakeInput))), RegUptakeByAgeFunc))
RegUptakeByAge = 
  RegUptakeByAge %>%
  mutate(Age1820 = factor(ifelse(Age %in% 18:20, "Yes", "No"), 
                          levels=c("No", "Yes")))

# Randomization inference
# LPM for uptake
DateOptions = seq.Date(ymd("1970-01-01"), ymd("1979-12-01"), by="months")
RegUptakeByAgeRIFunc = function(arg){
  dt = panel %>% 
    filter(Age<AgeAtFirstPillCensored+(1/100),
           floor(Age) == arg$Age, 
           BornYear+arg$Age>1968, BornYear+arg$Age<1981) %>%
    select(id, Date, AgeMonthsStart, AgeMonths, Age, FirstPill, 
           StateCode, BornDate, BornYear) %>%
    na.omit %>%
    group_by(id) %>%
    filter(max(ifelse(StateCode=="Overseas", 1, 0))==0) %>%
    ungroup %>%
    left_join(aomSmall %>% mutate(AoMDate = sample(DateOptions, 8)), by="StateCode") %>%
    mutate(
      After = ifelse(Date>=AoMDate, 1, 0),
      StateFactor = factor(StateCode),
      BornFactor = factor(BornYear),
      # AgeFactor = factor(as.integer(floor(Age))),
      # AgeMonthsFactor = factor(AgeMonths),
      Months = interval(min(BornDate), BornDate)%/%months(1))
  
  if(!arg$LT){
    RegTemp = lm(FirstPill~After+BornFactor+StateFactor,data=dt)
  }else{
    RegTemp = lm(FirstPill~After+BornFactor+StateFactor*Months,data=dt)
  }
  tibble(
    Age = arg$Age,
    # Haz = F,
    LT = arg$LT,
    CoefRI = coef(summary(RegTemp))[str_detect(row.names(coef(summary(RegTemp))), "After"), "Estimate"])
}
RegUptakeByAgeRIRepFunc = function(sample){
  bind_rows(lapply(split(RegUptakeInput, seq(nrow(RegUptakeInput))), RegUptakeByAgeRIFunc))
}
nSamples = 300
RegUptakeByAgeRI = bind_rows(map(1:nSamples, RegUptakeByAgeRIRepFunc))
# RegUptakeByAgeRI = bind_rows(RegUptakeByAgeRI, map(1:nSamples, RegUptakeByAgeRIRepFunc))
save(RegUptakeByAgeRI, file="RegUptakeByAgeRI.RData")

load("RegUptakeByAgeRI.RData")
RegUptakeByAgeRICollapse =
  RegUptakeByAgeRI %>%
  left_join(RegUptakeByAge %>% select(Age, LT, Coef)) %>%
  group_by(Age, LT) %>%
  summarise(pRI = mean(abs(CoefRI) > abs(Coef)),
            nRepsRI = length(CoefRI))

RegUptakeByAge = 
  RegUptakeByAge %>%
  left_join(RegUptakeByAgeRICollapse)

Coef19 = RegUptakeByAge$Coef[RegUptakeByAge$Age==19 & RegUptakeByAge$LT]
(Coef19*100) %>%
  prettyNum(digits=3) %>%
  writeLines("RegUptakeLPMCoef19LT.txt")
(Coef19*100/(1-.8^(1/12))) %>%
  prettyNum(digits=3) %>%
  writeLines("RegUptakeLPMCoef19LTProp.txt")
((.8 - (1-(1-.8^(1/12)+Coef19))^12)*100) %>%
  prettyNum(digits=3) %>%
  writeLines("RegUptakeLPMCoef19LTCum.txt")

# Plot LPM coefs on uptake by age
RegUptakeByAgeCoef24 = 
  ggplot(RegUptakeByAge %>%
           filter(LT==T, Age>=16, Age<=25),
         # filter(Age<26, Age>14),
         aes(x=Age, y=100*Coef, ymin=100*CI90LB, ymax=100*CI90UB)) +
  geom_hline(yintercept=0, color="grey70") +
  geom_vline(xintercept=17.5, linetype=2, color="grey70") +
  geom_vline(xintercept=20.5, linetype=2, color="grey70") +
  geom_line(linetype="dashed", color="grey50") +
  geom_linerange(size=.5, color="black") +
  geom_point(size=2) +
  scale_y_continuous("% point increase in pill uptake")+
  scale_x_continuous("Age", breaks=seq(12,34,1)) + 
  # coord_cartesian(ylim=c(-2.5, 2.5))+
  theme(panel.grid.minor = element_blank(),
        plot.margin = unit(c(1,0,.1,.1), "cm"),
        panel.grid.major.x = element_blank(),
        legend.position = "none")

# Plot sample size for LPM of uptake by age
RegUptakeByAgeN24 = 
  ggplot(RegUptakeByAge %>%
           filter(LT==T, Age>=16, Age<=25),
         aes(x=Age, y=nsize)) +
  geom_hline(yintercept=0, color="grey70") +
  geom_vline(xintercept=17.5, linetype=2, color="grey70") +
  geom_vline(xintercept=20.5, linetype=2, color="grey70") +
  geom_line(linetype=2, color="grey50") +
  geom_point(size=2) +
  scale_y_continuous("Women at risk")+
  scale_x_continuous("", breaks=seq(12,34,1)) +
  theme(panel.grid.minor = element_blank(),
        plot.margin = unit(c(0,0,0,.1), "cm"),
        panel.grid.major.x = element_blank(),
        legend.position = "none")

# Plot RI p-values for LPM coefs on uptake by age
RegUptakeByAgeP24 =
  ggplot(RegUptakeByAge %>%
           filter(LT==T, Age>=16, Age<=25),
         # filter(Age<26, Age>14),
         aes(x=Age, y=pRI)) +
  geom_hline(yintercept=0, color="grey70") +
  geom_hline(yintercept=1, color="grey70") +
  geom_vline(xintercept=17.5, linetype=2, color="grey70") +
  geom_vline(xintercept=20.5, linetype=2, color="grey70") +
  geom_line(linetype=2, color="grey50") +
  geom_point(size=2) +
  scale_y_continuous("P-values", breaks=seq(0,1,.2))+
  scale_x_continuous("", breaks=seq(12,34,1)) +
  coord_cartesian(ylim=c(0,1))+
  theme(panel.grid.minor = element_blank(),
        plot.margin = unit(c(0,0,0,.1), "cm"),
        panel.grid.major.x = element_blank(),
        legend.position = "none")

pdf("RegUptakeByAgeMulti25.pdf", width=6.5, height=7.2)
grid.newpage()
grid.draw(rbind(ggplotGrob(RegUptakeByAgeCoef24), 
                ggplotGrob(RegUptakeByAgeP24), 
                ggplotGrob(RegUptakeByAgeN24),
                size = "last"))
dev.off()



# Plots for LPM without state-specific linear trends
# Plot LPM coefs on uptake by age
RegUptakeByAgeCoef_LTF = 
  ggplot(RegUptakeByAge %>%
           filter(Haz==F, LT==F, Age>=16, Age<=25),
         # filter(Age<26, Age>14),
         aes(x=Age, y=100*Coef, ymin=100*CI90LB, ymax=100*CI90UB)) +
  geom_hline(yintercept=0, color="grey50") +
  geom_vline(xintercept=18, linetype=2, color="grey50") +
  geom_vline(xintercept=21, linetype=2, color="grey50") +
  geom_ribbon(alpha=.1, linetype=0) +
  geom_line(linetype=2) +
  geom_point() +
  scale_y_continuous("% point increase in pill uptake") 
# coord_cartesian(ylim=c(-2.5, 2.5))
ggsave("RegUptakeByAge_LTF.pdf", plot=RegUptakeByAgeCoef_LTF, width=6.5, height=4)

# Plot sample size for LPM of uptake by age
RegUptakeByAgeN_LTF = 
  ggplot(RegUptakeByAge %>%
           filter(Haz==F, LT==F, Age>=16, Age<=25),
         # filter(Age<26, Age>14),
         aes(x=Age, y=nsize)) +
  geom_hline(yintercept=0, color="grey50") +
  geom_hline(yintercept=1, color="grey50") +
  geom_vline(xintercept=18, linetype=2, color="grey50") +
  geom_vline(xintercept=21, linetype=2, color="grey50") +
  geom_line(linetype=2) +
  geom_point() +
  scale_y_continuous("Women in sample at risk")+
  scale_x_continuous("")
# coord_cartesian(ylim=c(0,1))
ggsave("RegUptakeByAgeN_LTF.pdf", plot=RegUptakeByAgeN_LTF, width=6.5, height=2)

# Plot RI p-values for LPM coefs on uptake by age
RegUptakeByAgeP_LTF =
  ggplot(RegUptakeByAge %>%
           filter(Haz==F, LT==F, Age>=16, Age<=25),
         # filter(Age<26, Age>14),
         aes(x=Age, y=pRI)) +
  geom_hline(yintercept=0, color="grey50") +
  geom_hline(yintercept=1, color="grey50") +
  geom_vline(xintercept=18, linetype=2, color="grey50") +
  geom_vline(xintercept=21, linetype=2, color="grey50") +
  geom_line(linetype=2) +
  geom_point() +
  scale_y_continuous("P-values", breaks=seq(0,1,.2))+
  scale_x_continuous("") +
  coord_cartesian(ylim=c(0,1))
ggsave("RegUptakeByAgeP_LTF.pdf", plot=RegUptakeByAgeP_LTF, width=6.5, height=2)

pdf("RegUptakeByAgeMulti_LTF.pdf", width=6.5, height=7.2)
grid.newpage()
grid.draw(rbind(ggplotGrob(RegUptakeByAgeCoef_LTF), 
                ggplotGrob(RegUptakeByAgeP_LTF), 
                ggplotGrob(RegUptakeByAgeN_LTF),
                size = "last"))
dev.off()


##################
# Hazard of uptake by age

AgeValues = 14:34
RegUptakeInput = crossing(
  Age = AgeValues,
  LT = c(TRUE, FALSE)
) 

RegUptakeByAgeFunc = function(arg){
  dt = panel %>% 
    filter(Age<AgeAtFirstPillCensored+(1/100),
           floor(Age) == arg$Age, 
           BornYear+arg$Age>1968, BornYear+arg$Age<1981) %>%
    select(id, Date, AgeMonthsStart, AgeMonths, Age, FirstPill, 
           StateCode, BornDate, BornYear, After) %>%
    na.omit %>%
    group_by(id) %>%
    filter(max(ifelse(StateCode=="Overseas", 1, 0))==0) %>%
    ungroup %>%
    mutate(
      StateFactor = factor(StateCode),
      BornFactor = factor(BornYear),
      AgeFactor = factor(as.integer(floor(Age))),
      AgeMonthsFactor = factor(AgeMonths),
      Months = interval(min(BornDate), BornDate)%/%months(1))
  
  if(!arg$LT){
    RegTemp =
      coxph(Surv(AgeMonthsStart, AgeMonths, FirstPill)~
              After+BornFactor+StateFactor + cluster(StateCode), data=dt)
    # RegTemp = lm(FirstPill~After+BornFactor+StateFactor+AgeMonthsFactor,data=dt)
  }else{
    RegTemp =
      coxph(Surv(AgeMonthsStart, AgeMonths, FirstPill)~
              After+BornFactor+StateFactor*Months + cluster(StateCode), data=dt)
  }
  tibble(
    Age = arg$Age,
    Haz = T,
    LT = arg$LT,
    Coef = coef(summary(RegTemp))[
      str_detect(row.names(coef(summary(RegTemp))), "After"), "coef"],
    SE = coef(summary(RegTemp))[
      str_detect(row.names(coef(summary(RegTemp))), "After"), "robust se"],
    SEUnclustered = coef(summary(RegTemp))[
      str_detect(row.names(coef(summary(RegTemp))), "After"), "se(coef)"],
    z = coef(summary(RegTemp))[
      str_detect(row.names(coef(summary(RegTemp))), "After"), "z"],
    pval = coef(summary(RegTemp))[
      str_detect(row.names(coef(summary(RegTemp))), "After"), "Pr(>|z|)"],
    nobs = length(dt$id),
    nsize = length(unique(dt$id)))
}

RegUptakeByAgeHaz = bind_rows(lapply(split(RegUptakeInput, seq(nrow(RegUptakeInput))), RegUptakeByAgeFunc)) %>%
  mutate(CI90LB = Coef-qnorm(.95)*pmax(SE, SEUnclustered),
         CI90UB = Coef+qnorm(.95)*pmax(SE, SEUnclustered))


# Randomization inference
# Hazard of uptake
DateOptions = seq.Date(ymd("1970-01-01"), ymd("1979-12-01"), by="months")
RegUptakeByAgeRIFunc = function(arg){
  dt = panel %>% 
    filter(Age<AgeAtFirstPillCensored+(1/100),
           floor(Age) == arg$Age, 
           BornYear+arg$Age>1968, BornYear+arg$Age<1981) %>%
    select(id, Date, AgeMonthsStart, AgeMonths, Age, FirstPill, 
           StateCode, BornDate, BornYear) %>%
    na.omit %>%
    group_by(id) %>%
    filter(max(ifelse(StateCode=="Overseas", 1, 0))==0) %>%
    ungroup %>%
    left_join(aomSmall %>% mutate(AoMDate = sample(DateOptions, 8)), by="StateCode") %>%
    mutate(
      After = ifelse(Date>=AoMDate, 1, 0),
      StateFactor = factor(StateCode),
      BornFactor = factor(BornYear),
      # AgeFactor = factor(as.integer(floor(Age))),
      # AgeMonthsFactor = factor(AgeMonths),
      Months = interval(min(BornDate), BornDate)%/%months(1))
  
  if(!arg$LT){
    RegTemp =
      coxph(Surv(AgeMonthsStart, AgeMonths, FirstPill)~
              After+BornFactor+StateFactor + cluster(StateCode), data=dt)
  }else{
    RegTemp =
      coxph(Surv(AgeMonthsStart, AgeMonths, FirstPill)~
              After+BornFactor+StateFactor*Months + cluster(StateCode), data=dt)
  }
  tibble(
    Age = arg$Age,
    # Haz = F,
    LT = arg$LT,
    CoefRI = coef(summary(RegTemp))[
      str_detect(row.names(coef(summary(RegTemp))), "After"), "coef"])
}
RegUptakeByAgeRIRepFunc = function(sample){
  bind_rows(lapply(split(RegUptakeInput, seq(nrow(RegUptakeInput))), RegUptakeByAgeRIFunc))
}
nSamples = 300
RegUptakeByAgeHazRI = bind_rows(map(1:nSamples, RegUptakeByAgeRIRepFunc))
# RegUptakeByAgeHazRI = bind_rows(RegUptakeByAgeHazRI, map(1:nSamples, RegUptakeByAgeRIRepFunc))
save(RegUptakeByAgeHazRI, file="RegUptakeByAgeHazRI.RData")

load("RegUptakeByAgeHazRI.RData")
RegUptakeByAgeHazRICollapse =
  RegUptakeByAgeHazRI %>%
  left_join(RegUptakeByAgeHaz %>% select(Age, LT, Coef)) %>%
  group_by(Age, LT) %>%
  summarise(pRI = mean(abs(CoefRI) > abs(Coef)),
            nRepsRI = length(CoefRI))

RegUptakeByAgeHaz = 
  RegUptakeByAgeHaz %>%
  left_join(RegUptakeByAgeHazRICollapse) %>%
  mutate(Age1820 = factor(ifelse(Age %in% 18:20, "Yes", "No"), 
                          levels=c("No", "Yes")))

# Ages 16-24
RegUptakeByAgeHazCoef24 = 
  ggplot(RegUptakeByAgeHaz %>%
           filter(LT==T, Age>=16, Age<=25),
         aes(x=Age, y=Coef, ymin=CI90LB, ymax=CI90UB)) +
  geom_hline(yintercept=0, color="grey70") +
  geom_vline(xintercept=17.5, linetype=2, color="grey70") +
  geom_vline(xintercept=20.5, linetype=2, color="grey70") +
  # geom_errorbar(size=.2, color="grey30") +
  geom_line(linetype="dashed", color="grey50") +
  geom_linerange(size=.5, color="black") +
  geom_point(size=2) +
  scale_y_continuous("% increase in pill uptake", labels=scales::percent) +
  scale_x_continuous("Age", breaks=seq(12,34,1)) +
  coord_cartesian(ylim=c(-2.0, 1.5)) + 
  # coord_cartesian(ylim=c(-2.5, 2.5))+
  theme(panel.grid.minor = element_blank(),
        # plot.margin = unit(c(1,0,.1,.1), "cm"),
        legend.position = "none",
        panel.grid.major.x = element_blank(),
        axis.title.y = element_text(margin = margin(t = 0, r = 20, b = 0, l = 0)))

# Plot sample size for hazard of uptake by age
RegUptakeByAgeHazN24 = 
  ggplot(RegUptakeByAgeHaz %>%
           filter(LT==T, Age>=16, Age<=25),
         # filter(Age<26, Age>14),
         aes(x=Age, y=nsize)) +
  geom_hline(yintercept=0, color="grey70") +
  geom_vline(xintercept=17.5, linetype=2, color="grey70") +
  geom_vline(xintercept=20.5, linetype=2, color="grey70") +
  geom_line(linetype="dashed", color="grey50") +
  geom_point(size=2) +
  scale_y_continuous("Women at risk") +
  scale_x_continuous("", breaks=seq(12,34,1)) +
  # coord_cartesian(ylim=c(-100, 200))+
  theme(panel.grid.minor = element_blank(),
        # plot.margin = unit(c(0,0,0,.1), "cm"),
        legend.position = "none",
        panel.grid.major.x = element_blank(),
        axis.title.y = element_text(margin = margin(t = 0, r = 20, b = 0, l = 0)))

# Plot RI p-values for hazard coefs on uptake by age
RegUptakeByAgeHazP24 = 
  ggplot(RegUptakeByAgeHaz %>%
           filter(LT==T, Age>=16, Age<=25),
         # filter(Age<26, Age>14),
         aes(x=Age, y=pRI)) +
  geom_hline(yintercept=0, color="grey70") +
  geom_hline(yintercept=1, color="grey70") +
  geom_vline(xintercept=17.5, linetype=2, color="grey70") +
  geom_vline(xintercept=20.5, linetype=2, color="grey70") +
  geom_line(linetype="dashed", color="grey50") +
  geom_point(size=2) +
  scale_y_continuous("P-values", breaks=seq(0,1,.2)) +
  scale_x_continuous("", breaks=seq(12,34,1)) +
  coord_cartesian(ylim=c(0,1)) +
  theme(panel.grid.minor = element_blank(),
        # plot.margin = unit(c(0,0,0,.1), "cm"),
        legend.position = "none",
        panel.grid.major.x = element_blank(),
        axis.title.y = element_text(margin = margin(t = 0, r = 20, b = 0, l = 0)))

pdf("RegUptakeByAgeHazMulti25.pdf", width=6.5, height=7.2)
grid.newpage()
grid.draw(rbind(ggplotGrob(RegUptakeByAgeHazCoef24), 
                ggplotGrob(RegUptakeByAgeHazP24), 
                ggplotGrob(RegUptakeByAgeHazN24),
                size = "last"))
dev.off()



######################################
######################################
# NFS 

################################
# Descriptive tables

StateCodesInNFS = unique(nfs$StateCode)
StatesInNFS = unique(nfs$State)
ExcludedStatesNFS = state.name[!(state.abb %in% StateCodesInNFS)]

paste(ExcludedStatesNFS[1:(length(ExcludedStatesNFS)-1)], ", ", sep="", collapse="") %>%
  writeLines("ExcludedStatesNFS1.tex")
ExcludedStatesNFS[length(ExcludedStatesNFS)] %>%
  writeLines("ExcludedStatesNFS2.tex")

LawTableDateFunc = function(date){
  ifelse(date<=ymd("1960-06-23"), "Before pill", 
         ifelse(date>=ymd("1971-04-01"), "After NFS", as.character(date)))
}
ELA18Table =
  ela18wide %>%
  select(State, FAoMY_LPY_FPN) %>%
  filter(State %in% StatesInNFS) %>%
  arrange(State) %>%
  mutate_at(vars(-State), LawTableDateFunc) %>% 
  rename(`ELA at age 18` = FAoMY_LPY_FPN)
ELA19Table =
  ela19wide %>%
  select(State, FAoMY_LPY_FPN) %>%
  filter(State %in% StatesInNFS) %>%
  arrange(State) %>%
  mutate_at(vars(-State), LawTableDateFunc) %>% 
  rename(`ELA at age 19` = FAoMY_LPY_FPN)
ELA20Table =
  ela20wide %>%
  select(State, FAoMY_LPY_FPN) %>%
  filter(State %in% StatesInNFS) %>%
  arrange(State) %>%
  mutate_at(vars(-State), LawTableDateFunc) %>% 
  rename(`ELA at age 20` = FAoMY_LPY_FPN)

ELATable =
  ELA18Table %>%
  full_join(ELA19Table) %>%
  full_join(ELA20Table)

ELAAbTable = 
  ELATable %>% 
  full_join(
    AbUS %>%
      filter(State %in% StatesInNFS) %>%
      mutate_at(vars(AbDate), LawTableDateFunc) %>%
      # filter(AbDate<ymd("1973-01-01")) %>%
      # arrange(AbDate) %>%
      rename(`Abortion legal` = AbDate)
  ) %>%
  filter_at(vars(-State), any_vars(.!="After NFS" & .!="Before pill"))

# CellGreyFunc = function(arg){
#   cell_spec(arg, "latex", color = ifelse(arg =="After NFS" | arg =="Before pill", "gray", "black"))
# }
ELAAbTable %>%
  # mutate_at(vars(-State), CellGreyFunc) %>%
  kable(
    format="latex",
    # col.names = c("State", "Date"), 
    linesep = "",
    # booktabs=T,
    # caption='Dates of ELA and when abortion was first legal (for most purposes) and confidentially available to 19-year-olds by US state. The table includes only the states where laws changed after June 1960 and before April 1971.',
    # label='ELAAbTable',
    escape = FALSE) %>%
  kable_styling(latex_table_env="") %>%
  writeLines("ELAAbTable.tex")

# ELAAbTable %>% stargazer(summary=F)



#####################
# Descriptive figures

CountsByBornYearNFS = 
  nfs %>% 
  group_by(BornYear) %>%
  summarize(Count=length(id)) %>%
  ungroup %>%
  filter(BornYear<1954) %>%
  mutate(Sample = "NFS")
CountsByBornYearNFS %>%
  ggplot(aes(x=BornYear, y=Count))+
  geom_line() +
  scale_x_continuous("Year Born") +
  annotate("line", x=c(1942, 1942), y=c(110,320), color="grey50", linetype="dashed") +
  annotate("line", x=c(1948, 1948), y=c(110,320), color="grey50", linetype="dashed") +
  annotate("text", x=1942, y=100, label="Turn 18", size=2.5) +
  annotate("text", x=1942, y=90, label="in 1960", size=2.5) +
  annotate("text", x=1948, y=100, label="Turn 21", size=2.5) +
  annotate("text", x=1948, y=90, label="in 1969", size=2.5) 
ggsave("CountsByBornYearNFS.pdf", width=6.5, height=3.5)


######################
# Time series evidence

# Prop using pill as function of time since ELA
PillPropByMoSinceAoMNFS %>% 
ggplot(aes(x=MonthsSinceAoM, y=PillProp, ymin=RibbonLower, ymax=RibbonUpper))+
  geom_vline(xintercept = 0, linetype="dashed", color="grey50")+ 
  geom_line(size=1)+
  geom_line(aes(x=MonthsSinceAoM, y=RibbonLower), 
            color="grey50", size=.4)+
  geom_line(aes(x=MonthsSinceAoM, y=RibbonUpper), 
            color="grey50", size=.4)+
  scale_y_continuous("Prop. using the pill")+
  scale_x_continuous("Years since ELA at age 19", limits=c(-5*12, 5*12), 
                     breaks=seq(-5*12, 5*12, 12), labels=seq(-5, 5, 1))+
  coord_cartesian(ylim = c(0,.6)) +
  theme(plot.margin = unit(c(1,0,0,.1), "cm"))
ggsave("PillPropByMoSinceAoMNFS.pdf", width=6.5, height=2.3)


# Prop using before age 21 by age in year after ELA
PillByAgeAfterELA = 
  crossing(id=unique(nfs$id), ELAcode=ELAcodes) %>%
  left_join(nfs %>% 
              select(id, BornYear, BornDate, State, StateCode, PillBefore17:PillBefore22), 
            by=c("id")) %>%
  left_join(ela, by=c("State", "ELAcode")) %>%
  left_join(AbUS, by="State") %>%
  mutate(AgeInYearAfterAoM = round(
    interval(floor_date(BornDate, unit="months"), 
             floor_date(ELA19Date, unit="months")
    )%/% months(1) / 12)) %>%
  filter(AgeInYearAfterAoM<30, AgeInYearAfterAoM>10) %>%
  group_by(AgeInYearAfterAoM, ELAcode) %>%
  summarise(PillBefore17 = mean(PillBefore17, na.rm=T),
            PillBefore18 = mean(PillBefore18, na.rm=T),
            PillBefore19 = mean(PillBefore19, na.rm=T),
            PillBefore20 = mean(PillBefore20, na.rm=T),
            PillBefore21 = mean(PillBefore21, na.rm=T),
            PillBefore22 = mean(PillBefore22, na.rm=T),
            nSize = length(ELAcode)) %>%
  ungroup

PillByAgeAfterELA %>% 
  filter(ELAcode=="FAoMY_LPY_FPN") %>%
ggplot(aes(x=AgeInYearAfterAoM, y=PillBefore21)) +
  geom_vline(xintercept=20, linetype=2, color="grey50") +
  geom_line(linetype=2) +
  geom_point()+
  scale_x_reverse("Mode of age in the year following ELA",
                  breaks=seq(10, 30, 2)) +
  scale_y_continuous("Proportion starting the pill before age 21")+
  annotate("text", x=16.5, y=.605, label="At least partially treated")+
  geom_segment(aes(x = 20, y = .58, xend = 11, yend = .58), colour='grey50', size=.5, arrow = arrow(length = unit(0.3, "cm")))
ggsave("PillBeforeAgeTSNFS.pdf",width=6.5, height=4)


ggplot(PillByAgeAfterAoM, aes(x=AgeInYearAfterAoM, y=PillBefore21)) +
  geom_vline(xintercept=20, linetype=2, color="grey50") +
  geom_line(linetype=2) +
  geom_point()+
  scale_x_reverse("Mode of age in the year following ELA",
                  breaks=seq(10, 30, 2)) +
  scale_y_continuous("Proportion starting the pill before age 21")+
  annotate("text", x=16.5, y=.38, label="At least partially treated")+
  geom_segment(aes(x = 20, y = .355, xend = 11, yend = .355), colour='grey50', size=.5, arrow = arrow(length = unit(0.3, "cm")))
ggsave("PillBeforeAgeTS.pdf", width = 6.5, height=4)

PillByAgeAfterELA %>% 
  filter(ELAcode=="FAoMY_LPY_FPN") %>%
  # filter(ELAcode!="BHM2012", str_detect(ELAcode, "LPY")) %>%
  ggplot(aes(x=AgeInYearAfterAoM, y=nSize)) +
  geom_vline(xintercept=20, linetype=2, color="grey50") +
  geom_line(linetype=2) +
  geom_point()+
  scale_x_reverse("Mode of age in the year following AoM",
                  breaks=seq(10, 30, 2)) +
  scale_y_continuous("Proportion starting the pill before age 21")
ggsave("PillBeforeAgeTSNFS_N.pdf",width=6.5, height=4)


# Initial uptake
PillStartByYrSinceELA = 
  nfspanel %>%
  filter(18<=Age & Age<21 & Date>=ymd("1960-01-01")) %>%
  # Drop post-Pill obervations
  filter(Age<AgeAtFirstPillCensored+(1/100))%>%
  select(id, Date, StateCode, State, BornDate, BornYear, Pill) %>%
  left_join(ela19wide, by="State") %>%
  gather(key=ELAcode, value=ELADate, BHM2012:FAoMN_LPN_FPN_KY68) %>%
  filter(ELADate>ymd("1960-06-23"), ELADate<ymd("1971-04-01")) %>%
  mutate(MonthsSinceAoM = interval(floor_date(ELADate, unit="months")-months(1), Date)%/%months(1),
         YearsSinceAoM = ceiling(MonthsSinceAoM/12)-1)%>%
  group_by(YearsSinceAoM, ELAcode) %>% 
  summarise(PillProp = mean(Pill, na.rm=T)*12,
            PillCount = sum(Pill, na.rm=T),
            Count1820 = pmax(round(length(id)/12),1)) %>%
  ungroup %>%
  mutate(RibbonLower = qbinom(.025, Count1820, PillProp)/Count1820,
         RibbonUpper = qbinom(.975, Count1820, PillProp)/Count1820)

PillStartByYrSinceELA %>% 
  filter(ELAcode=="FAoMY_LPY_FPN") %>%
  # filter(!str_detect(ELAcode, "KY68") & ELAcode!="AFP")
ggplot(aes(x=YearsSinceAoM, y=PillProp, ymin=RibbonLower, ymax=RibbonUpper))+
  geom_vline(xintercept = 0, linetype="dashed", color="grey50")+ 
  # geom_ribbon(alpha=.1, linetype=0)+
  geom_line(linetype="dashed")+
  # geom_linerange()+
  geom_line(aes(x=YearsSinceAoM, y=RibbonLower), linetype="dashed", color="grey50")+
  geom_line(aes(x=YearsSinceAoM, y=RibbonUpper), linetype="dashed", color="grey50")+
  geom_point()+
  scale_y_continuous("Prop. of at-risk women aged 18-20")+
  scale_x_continuous("Years since ELA at age 19", breaks = seq(-12,12,2), limits=c(-7,5))+
  coord_cartesian(ylim = c(0,.8)) +
  theme(axis.title.y = element_text(size=rel(.8)))
ggsave("PillStartByYrSinceELA.pdf", width=6.5, height=3)
# ggsave(filename = "plots/PillStartByYrSinceELA19NFS.png", 
#        type="cairo-png", dpi="retina", width=8, height=3.5)



##############################
# Uptake
AgeValues = 12:34
RegUptakeInput = crossing(
  Age = AgeValues,
  LT = c(TRUE, FALSE)
) 

RegUptakeByAgeFuncNFS = function(arg){
  dt = 
    nfspanel %>%
    filter(Age<AgeAtFirstPillCensored+(1/100),
           floor(Age) == arg$Age, 
           Date>ymd("1960-06-23"), 
           !is.na(State)) %>%
    select(id, Date, AgeMonthsStart, AgeMonths, Age, FirstPill, 
           State, StateCode, BornDate, BornYear) %>%
    na.omit %>%
    left_join(ela %>% filter(ELAcode=="FAoMY_LPY_FPN"), by=c("State")) %>%
    left_join(AbUS, by="State") %>%
    mutate(
      After = ifelse(Date>=round_date(ELA19Date, unit="months"), 1, 0),
      AbLegal = ifelse(Date>=round_date(AbDate, unit="months"), 1, 0),
      StateFactor = factor(StateCode),
      BornFactor = factor(BornYear),
      AgeFactor = factor(as.integer(floor(Age))),
      AgeMonthsFactor = factor(AgeMonths),
      Months = interval(min(BornDate), BornDate)%/%months(1))
  
  if(!arg$LT){
    RegTemp = lm(FirstPill~After+AbLegal+BornFactor+StateFactor,data=dt)
  }else{
    RegTemp = lm(FirstPill~After+AbLegal+BornFactor+StateFactor*Months,data=dt)
  }
  RegTempCl = ClusterReg(RegTemp, StateFactor)
  RegTempCI = ClusterCI(RegTemp, StateFactor, level=.9)
  tibble(
    Age = arg$Age,
    Haz = F,
    LT = arg$LT,
    Coef = coef(summary(RegTemp))[str_detect(row.names(coef(summary(RegTemp))), "After"), "Estimate"],
    CI90LB = RegTempCI[str_detect(row.names(RegTempCl), "After"), 1],
    CI90UB = RegTempCI[str_detect(row.names(RegTempCl), "After"), 2],
    SE = RegTempCl[str_detect(row.names(RegTempCl), "After"), "Std. Error"],
    SEUnclustered = coef(summary(RegTemp))[
      str_detect(row.names(coef(summary(RegTemp))), "After"), "Std. Error"],
    tstat = RegTempCl[str_detect(row.names(RegTempCl), "After"), "t value"],
    pval = RegTempCl[str_detect(row.names(RegTempCl), "After"), "Pr(>|t|)"],
    nobs = nobs(RegTemp),
    nsize = length(unique(dt$id)))
}

RegUptakeByAgeNFS = bind_rows(lapply(split(RegUptakeInput, seq(nrow(RegUptakeInput))), RegUptakeByAgeFuncNFS))


# Randomization inference
# LPM for uptake
elasmall = ela %>% 
  filter(ELAcode=="FAoMY_LPY_FPN") %>%
  select(State, ELA19Date)
# DateOptions = seq.Date(ymd("1960-06-01"), ymd("1971-03-01"), by="months")
RegUptakeByAgeRIFuncNFS = function(arg){
  dt = 
    nfspanel %>%
    filter(Age<AgeAtFirstPillCensored+(1/100),
           floor(Age) == arg$Age, 
           Date>ymd("1960-06-23"), 
           !is.na(State)) %>%
    select(id, Date, AgeMonthsStart, AgeMonths, Age, FirstPill, 
           State, StateCode, BornDate, BornYear) %>%
    na.omit %>%
    left_join(elasmall %>% mutate(ELA19Date = sample(ELA19Date, 51)), by="State") %>%
    # left_join(ela %>% filter(ELAcode=="FAoMY_LPY_FPN"), by=c("State")) %>%
    left_join(AbUS, by="State") %>%
    mutate(After = ifelse(Date>=round_date(ELA19Date, unit="months"), 1, 0),
           AbLegal = ifelse(Date>=round_date(AbDate, unit="months"), 1, 0),
           StateFactor = factor(StateCode),
           BornFactor = factor(BornYear),
           # AgeFactor = factor(as.integer(floor(Age))),
           # AgeMonthsFactor = factor(AgeMonths),
           Months = interval(min(BornDate), BornDate)%/%months(1))

  if(!arg$LT){
    RegTemp = lm(FirstPill~After+AbLegal+BornFactor+StateFactor,data=dt)
    # RegTemp = lm(FirstPill~After+BornFactor+StateFactor+AgeMonthsFactor,data=dt)
  }else{
    RegTemp = lm(FirstPill~After+AbLegal+BornFactor+StateFactor*Months,data=dt)
  }
  tibble(
    Age = arg$Age,
    # Haz = F,
    LT = arg$LT,
    CoefRI = coef(summary(RegTemp))[str_detect(row.names(coef(summary(RegTemp))), "After"), "Estimate"])
}
RegUptakeByAgeRIRepFuncNFS = function(sample){
  bind_rows(lapply(split(RegUptakeInput, seq(nrow(RegUptakeInput))), RegUptakeByAgeRIFuncNFS))
}
nSamples = 300
RegUptakeByAgeNFSRI = bind_rows(map(1:nSamples, RegUptakeByAgeRIRepFuncNFS))
# RegUptakeByAgeNFSRI = bind_rows(RegUptakeByAgeNFSRI, map(1:nSamples, RegUptakeByAgeRIRepFuncNFS))
save(RegUptakeByAgeNFSRI, file="RegUptakeByAgeNFSRI.RData")

load("RegUptakeByAgeNFSRI.RData")
# ggplot(RegUptakeByAgeNFSRI %>% filter(LT==T, Age==19), 
#        aes(x=CoefRI))+
#   geom_histogram(alpha=.3, bins=40)
RegUptakeByAgeNFSRICollapse =
  RegUptakeByAgeNFSRI %>%
  left_join(RegUptakeByAgeNFS %>% select(Age, LT, Coef)) %>%
  group_by(Age, LT) %>%
  summarise(pRI = mean(abs(CoefRI) > abs(Coef)),
            nRepsRI = length(CoefRI))

RegUptakeByAgeNFS = 
  RegUptakeByAgeNFS %>%
  left_join(RegUptakeByAgeNFSRICollapse)
RegUptakeByAgeNFS = 
  RegUptakeByAgeNFS %>%
  mutate(Age1820 = factor(ifelse(Age %in% 18:20, "Yes", "No"), levels=c("No","Yes")))


# Plot LPM coefs on uptake by age
RegUptakeByAgeNFSCoef24 = 
  ggplot(RegUptakeByAgeNFS %>%
           filter(Haz==F, LT==T, Age>=16, Age<=24),
         # filter(Age<26, Age>14),
         aes(x=Age, y=100*Coef, ymin=100*CI90LB, ymax=100*CI90UB)) +
  geom_hline(yintercept=0, color="grey70") +
  geom_vline(xintercept=17.5, linetype=2, color="grey70") +
  geom_vline(xintercept=20.5, linetype=2, color="grey70") +
  geom_line(linetype="dashed", color="grey50") +
  geom_linerange(size=.5, color="black") +
  geom_point(size=2) +
  scale_y_continuous("% point increase in pill uptake") +
  scale_x_continuous("Age", breaks=12:34)+
  theme(panel.grid.minor = element_blank(),
        # plot.margin = unit(c(1,0,0,.1), "cm"),
        legend.position = "none",
        panel.grid.major.x = element_blank())

# Plot sample size for LPM of uptake by age
RegUptakeByAgeNFSN24 = 
  ggplot(RegUptakeByAgeNFS %>%
           filter(Haz==F, LT==T, Age>=16, Age<=24),
         # filter(Age<26, Age>14),
         aes(x=Age, y=nsize)) +
  geom_hline(yintercept=0, color="grey70") +
  geom_vline(xintercept=17.5, linetype=2, color="grey70") +
  geom_vline(xintercept=20.5, linetype=2, color="grey70") +
  geom_line(linetype="dashed", color="grey50") +
  geom_point(size=2) +
  scale_y_continuous("Women at risk")+
  scale_x_continuous("", breaks=12:32)+
  theme(panel.grid.minor = element_blank(),
        # plot.margin = unit(c(0,0,0,.1), "cm"),
        legend.position = "none",
        panel.grid.major.x = element_blank())

# Plot RI p-values for LPM coefs on uptake by age
RegUptakeByAgeNFSP24 =
  ggplot(RegUptakeByAgeNFS %>%
           filter(Haz==F, LT==T, Age>=16, Age<=24),
         # filter(Age<26, Age>14),
         aes(x=Age, y=pRI)) +
  geom_hline(yintercept=0, color="grey70") +
  geom_hline(yintercept=1, color="grey70") +
  geom_vline(xintercept=17.5, linetype=2, color="grey70") +
  geom_vline(xintercept=20.5, linetype=2, color="grey70") +
  geom_line(linetype="dashed", color="grey50") +
  geom_point(size=2) +
  scale_y_continuous("P-values", breaks=seq(0,1,.2))+
  scale_x_continuous("", breaks=12:32) +
  coord_cartesian(ylim=c(0,1))+
  theme(panel.grid.minor = element_blank(),
        # plot.margin = unit(c(0,0,0,.1), "cm"),
        legend.position = "none",
        panel.grid.major.x = element_blank())

pdf("RegUptakeByAgeNFSMulti25.pdf", width=6.5, height=7.2)
grid.newpage()
grid.draw(rbind(ggplotGrob(RegUptakeByAgeNFSCoef24), 
                ggplotGrob(RegUptakeByAgeNFSP24), 
                ggplotGrob(RegUptakeByAgeNFSN24),
                size = "last"))
dev.off()



##################
# Hazard of uptake by age

RegUptakeByAgeFuncNFS = function(arg){
  dt = 
    nfspanel %>%
    filter(Age<AgeAtFirstPillCensored+(1/100),
           floor(Age) == arg$Age, 
           Date>ymd("1960-06-23"), 
           !is.na(State)) %>%
    select(id, Date, AgeMonthsStart, AgeMonths, Age, FirstPill, 
           State, StateCode, BornDate, BornYear) %>%
    na.omit %>%
    left_join(ela %>% filter(ELAcode=="FAoMY_LPY_FPN"), by=c("State")) %>%
    left_join(AbUS, by="State") %>%
    mutate(After = ifelse(Age<19 & Date>=round_date(ELA18Date, unit="months"), 1,
                          ifelse(Age<20 & Date>=round_date(ELA19Date, unit="months"), 1,
                                 ifelse(Date>=round_date(ELA20Date, unit="months"), 1, 0))),
           AbLegal = ifelse(Date>=round_date(AbDate, unit="months"), 1, 0),
           StateFactor = factor(StateCode),
           BornFactor = factor(BornYear),
           AgeFactor = factor(as.integer(floor(Age))),
           AgeMonthsFactor = factor(AgeMonths),
           Months = interval(min(BornDate), BornDate)%/%months(1))

  if(!arg$LT){
    RegTemp =
      coxph(Surv(AgeMonthsStart, AgeMonths, FirstPill)~
              After+BornFactor+StateFactor + cluster(StateCode), data=dt)
    # RegTemp = lm(FirstPill~After+BornFactor+StateFactor+AgeMonthsFactor,data=dt)
  }else{
    RegTemp =
      coxph(Surv(AgeMonthsStart, AgeMonths, FirstPill)~
              After+BornFactor+StateFactor*Months + cluster(StateCode), data=dt)
  }
  tibble(
    Age = arg$Age,
    Haz = T,
    LT = arg$LT,
    Coef = coef(summary(RegTemp))[
      str_detect(row.names(coef(summary(RegTemp))), "After"), "coef"],
    SE = coef(summary(RegTemp))[
      str_detect(row.names(coef(summary(RegTemp))), "After"), "robust se"],
    SEUnclustered = coef(summary(RegTemp))[
      str_detect(row.names(coef(summary(RegTemp))), "After"), "se(coef)"],
    z = coef(summary(RegTemp))[
      str_detect(row.names(coef(summary(RegTemp))), "After"), "z"],
    pval = coef(summary(RegTemp))[
      str_detect(row.names(coef(summary(RegTemp))), "After"), "Pr(>|z|)"],
    nobs = length(dt$id),
    nsize = length(unique(dt$id)))
}

AgeValues = 14:34
RegUptakeInput = crossing(
  Age = AgeValues,
  LT = c(TRUE, FALSE)
  # Haz = c(TRUE, FALSE)
) 
RegUptakeByAgeHazNFS = 
  bind_rows(lapply(split(RegUptakeInput, seq(nrow(RegUptakeInput))), RegUptakeByAgeFuncNFS)) %>%
  mutate(CI90LB = Coef-qnorm(.95)*pmax(SE, SEUnclustered),
         CI90UB = Coef+qnorm(.95)*pmax(SE, SEUnclustered))
RegUptakeByAgeHazNFS = 
  RegUptakeByAgeHazNFS %>%
  mutate(Age1820 = factor(ifelse(Age %in% 18:20, "Yes", "No"), 
                          levels=c("No", "Yes")))

# Randomization inference
RegUptakeByAgeRIFuncNFS = function(arg){
  dt = 
    nfspanel %>%
    filter(Age<AgeAtFirstPillCensored+(1/100),
           floor(Age) == arg$Age, 
           Date>ymd("1960-06-23"), 
           !is.na(State)) %>%
    select(id, Date, AgeMonthsStart, AgeMonths, Age, FirstPill, 
           State, StateCode, BornDate, BornYear) %>%
    na.omit %>%
    left_join(elasmall %>% mutate(ELA19Date = sample(ELA19Date, 51)), by="State") %>%
    # left_join(ela %>% filter(ELAcode=="FAoMY_LPY_FPN"), by=c("State")) %>%
    left_join(AbUS, by="State") %>%
    mutate(After = ifelse(Date>=round_date(ELA19Date, unit="months"), 1, 0),
           AbLegal = ifelse(Date>=round_date(AbDate, unit="months"), 1, 0),
           StateFactor = factor(StateCode),
           BornFactor = factor(BornYear),
           # AgeFactor = factor(as.integer(floor(Age))),
           # AgeMonthsFactor = factor(AgeMonths),
           Months = interval(min(BornDate), BornDate)%/%months(1))
  
  if(!arg$LT){
    RegTemp =
      coxph(Surv(AgeMonthsStart, AgeMonths, FirstPill)~
              After+BornFactor+StateFactor + cluster(StateCode), data=dt)
  }else{
    RegTemp =
      coxph(Surv(AgeMonthsStart, AgeMonths, FirstPill)~
              After+BornFactor+StateFactor*Months + cluster(StateCode), data=dt)
  }
  tibble(
    Age = arg$Age,
    # Haz = F,
    LT = arg$LT,
    CoefRI = coef(summary(RegTemp))[
      str_detect(row.names(coef(summary(RegTemp))), "After"), "coef"])
}
RegUptakeByAgeRIRepFuncNFS = function(sample){
  bind_rows(lapply(split(RegUptakeInput, seq(nrow(RegUptakeInput))), RegUptakeByAgeRIFuncNFS))
}
nSamples = 300
RegUptakeByAgeHazNFSRI = bind_rows(map(1:nSamples, RegUptakeByAgeRIRepFuncNFS))
# RegUptakeByAgeHazNFSRI = bind_rows(RegUptakeByAgeHazNFSRI, map(1:nSamples, RegUptakeByAgeRIRepFuncNFS))
save(RegUptakeByAgeHazNFSRI, file="RegUptakeByAgeHazNFSRI.RData")

load("RegUptakeByAgeHazNFSRI.RData")
# ggplot(RegUptakeByAgeRI %>% filter(LT==T, Age==19), 
#        aes(x=CoefRI))+
#   geom_histogram(alpha=.3, bins=40)
RegUptakeByAgeHazNFSRICollapse =
  RegUptakeByAgeHazNFSRI %>%
  left_join(RegUptakeByAgeHazNFS %>% select(Age, LT, Coef)) %>%
  group_by(Age, LT) %>%
  summarise(pRI = mean(abs(CoefRI) > abs(Coef)),
            nRepsRI = length(CoefRI))

RegUptakeByAgeHazNFS = 
  RegUptakeByAgeHazNFS %>%
  left_join(RegUptakeByAgeHazNFSRICollapse)


# Ages 16-24
RegUptakeByAgeHazCoef24 = 
  ggplot(RegUptakeByAgeHaz %>%
           filter(LT==T, Age>=16, Age<=25),
         aes(x=Age, y=Coef, ymin=CI90LB, ymax=CI90UB)) +
  geom_hline(yintercept=0, color="grey70") +
  geom_vline(xintercept=17.5, linetype=2, color="grey70") +
  geom_vline(xintercept=20.5, linetype=2, color="grey70") +
  # geom_errorbar(size=.2, color="grey30") +
  geom_line(linetype="dashed", color="grey50") +
  geom_linerange(size=4, alpha=.1, color="black") +
  geom_point(mapping=aes(shape=Age1820, color=Age1820), size=2.5) +
  scale_y_continuous("% increase in pill uptake", labels=scales::percent) +
  scale_x_continuous("Age", breaks=seq(12,34,1)) +
  coord_cartesian(ylim=c(-2.0, 1.5)) + 
  # coord_cartesian(ylim=c(-2.5, 2.5))+
  theme(panel.grid.minor = element_blank(),
        # plot.margin = unit(c(1,0,.1,.1), "cm"),
        legend.position = "none",
        panel.grid.major.x = element_blank(),
        axis.title.y = element_text(margin = margin(t = 0, r = 20, b = 0, l = 0)))+
  Color2+
  Shape2

# Age 16-25
# Plot hazard on uptake coefs by age
RegUptakeByAgeHazNFSCoef25 = 
  ggplot(RegUptakeByAgeHazNFS %>%
           filter(LT==T, Age>=16, Age<=25),
         # filter(Age<26, Age>14),
         aes(x=Age, y=Coef, ymin=CI90LB, ymax=CI90UB)) +
  geom_hline(yintercept=0, color="grey50") +
  geom_vline(xintercept=17.5, linetype=2, color="grey50") +
  geom_vline(xintercept=20.5, linetype=2, color="grey50") +
  geom_line(linetype="dashed", color="grey50") +
  geom_linerange(size=.5, color="black") +
  geom_point(size=2) +
  scale_y_continuous("% increase in pill uptake", labels=scales::percent, breaks=-4:4) +
  scale_x_continuous("Age", breaks= seq(12,32,1))+
  coord_cartesian(ylim=c(-2.50, 3.50))+ 
  # coord_cartesian(ylim=c(-2.5, 2.5))+
  theme(panel.grid.minor = element_blank(),
        # plot.margin = unit(c(1,0,.1,.1), "cm"),
        legend.position = "none",
        panel.grid.major.x = element_blank(),
        axis.title.y = element_text(margin = margin(t = 0, r = 20, b = 0, l = 0)))

# Plot sample size for hazard of uptake by age
RegUptakeByAgeHazNFSN25 = 
  ggplot(RegUptakeByAgeHazNFS %>%
           filter(LT==T, Age>=16, Age<=25),
         # filter(Age<26, Age>14),
         aes(x=Age, y=nsize)) +
  geom_hline(yintercept=0, color="grey50") +
  geom_vline(xintercept=17.5, linetype=2, color="grey50") +
  geom_vline(xintercept=20.5, linetype=2, color="grey50") +
  geom_line(linetype=2) +
  geom_point(size=2) +
  scale_y_continuous("Women at risk") +
  scale_x_continuous("", breaks= seq(12,32,1))+
# coord_cartesian(ylim=c(-100, 200))+
  theme(panel.grid.minor = element_blank(),
        # plot.margin = unit(c(1,0,.1,.1), "cm"),
        legend.position = "none",
        panel.grid.major.x = element_blank(),
        axis.title.y = element_text(margin = margin(t = 0, r = 20, b = 0, l = 0)))

# Plot RI p-values for hazard coefs on uptake by age
RegUptakeByAgeHazNFSP25 = 
  ggplot(RegUptakeByAgeHazNFS %>%
           filter(LT==T, Age>=16, Age<=25),
         # filter(Age<26, Age>14),
         aes(x=Age, y=pRI)) +
  geom_hline(yintercept=0, color="grey50") +
  geom_hline(yintercept=1, color="grey50") +
  geom_vline(xintercept=17.5, linetype=2, color="grey50") +
  geom_vline(xintercept=20.5, linetype=2, color="grey50") +
  geom_line(linetype=2) +
  geom_point(size=2) +
  scale_y_continuous("P-values", breaks=seq(0,1,.2)) +
  scale_x_continuous("", breaks= seq(12,32,1))+
  coord_cartesian(ylim=c(0,1))+
  theme(panel.grid.minor = element_blank(),
        # plot.margin = unit(c(1,0,.1,.1), "cm"),
        legend.position = "none",
        panel.grid.major.x = element_blank(),
        axis.title.y = element_text(margin = margin(t = 0, r = 20, b = 0, l = 0)))

pdf("RegUptakeByAgeHazNFSMulti25.pdf", width=6.5, height=7.2)
grid.newpage()
grid.draw(rbind(ggplotGrob(RegUptakeByAgeHazNFSCoef25), 
                ggplotGrob(RegUptakeByAgeHazNFSP25), 
                ggplotGrob(RegUptakeByAgeHazNFSN25),
                size = "last"))
dev.off()

